import pandas as pd
import networkx as nx
import community
import matplotlib.pyplot as plt
import numpy as np
fh=open("D:\\Johanna\\Ecole\\Imperial\\Network Analysis\\HW3\\zachary_karate_club.txt", 'rb')
G=nx.read_edgelist(fh)
fh.close()
Then, we will use the networkx $girvan\_newman$ algorithm with betweenness as centrality measure.
import itertools
def print_girvan_communities(G, l, k):
comm = nx.algorithms.community.centrality.girvan_newman(G)
fig, ax = plt.subplots(1, int((l-1)/k+1), figsize=(15,5), clear = True)
#print(l, k, int((l-1)/k+1))
fig.suptitle('Communities graph using girvan algorithm', fontsize=16)
i = 0
for c in itertools.islice(comm, l):
if i%k == 0:
part = tuple(sorted(n) for n in c)
values = []
for n in G.nodes:
j = 0
while j < len(part) and n not in part[j]:
j+=1
values.append(j)
#print(int((i)/k))
nx.draw_spring(G, ax = ax[int((i/k))],cmap = plt.get_cmap('jet'), node_color = values, node_size=40, width = 0.25, with_labels=False)
ax[int((i)/k)].set_title('k = '+str((2+i)))
i += 1
else :
i +=1
print_girvan_communities(G, 6, 3)
def print_best_partition(G):
part = community.best_partition(G)
values = [part.get(node) for node in G.nodes()]
fig, ax = plt.subplots(1, 1, figsize=(7.5, 7.5), clear = True)
fig.suptitle('Community graph using the best modularity partition', fontsize = 14)
nx.draw_spring(G, cmap = plt.get_cmap('jet'), node_color = values, node_size=50, width = 0.25, with_labels=False)
print_best_partition(G)
The squeleton of those functions are from https://stackoverflow.com/questions/43541376/how-to-draw-communities-with-networkx.
def community_layout(g, partition, cs, ns):
pos_communities = _position_communities(g, partition, scale=cs)
pos_nodes = _position_nodes(g, partition, scale=ns)
# combine positions
pos = dict()
for node in g.nodes():
pos[node] = pos_communities[node] + pos_nodes[node]
return pos
def _position_communities(g, partition, **kwargs):
# create a weighted graph, in which each node corresponds to a community,
# and each edge weight to the number of edges between communities
between_community_edges = _find_between_community_edges(g, partition)
communities = set(partition.values())
print(communities)
hypergraph = nx.Graph()
hypergraph.add_nodes_from(communities)
for (ci, cj), edges in between_community_edges.items():
hypergraph.add_edge(ci, cj)
# find layout for communities
pos_communities = nx.spring_layout(hypergraph, **kwargs)
# set node positions to position of community
pos = dict()
for node, community in partition.items():
pos[node] = pos_communities[community]
return pos
def _find_between_community_edges(g, partition):
edges = dict()
for (ni, nj) in g.edges():
ci = partition[ni]
cj = partition[nj]
if ci != cj:
try:
edges[(ci, cj)] += [(ni, nj)]
except KeyError:
edges[(ci, cj)] = [(ni, nj)]
return edges
def _position_nodes(g, partition, **kwargs):
"""
Positions nodes within communities.
"""
communities = dict()
for node, community in partition.items():
try:
communities[community] += [node]
except KeyError:
communities[community] = [node]
pos = dict()
for ci, nodes in communities.items():
subgraph = g.subgraph(nodes)
pos_subgraph = nx.spring_layout(subgraph, **kwargs)
pos.update(pos_subgraph)
return pos
def test(G, cs, ns):
from community import community_louvain
part = community.best_partition(G)
values = [part.get(node) for node in G.nodes()]
pos = community_layout(G, part, cs, ns)
nx.draw(G, pos, cmap = plt.get_cmap('jet'), node_color=values, width = 0.1, with_labels = True)
test(G, 3, 1)
Let us see the community structure. First, we need to put the adjacency list into the graph format.
#The "-" have been removed from the 2 datasets
sent = pd.read_csv("D:\\Johanna\\Ecole\\Imperial\\Network Analysis\\HW2\\HW2_who_talks_to_whom_same.csv")
sent.head()
#Let us fill the NA with 0 and put everything into int format
sent = sent.fillna(0).astype(int)
same = [[0]*len(sent)]*len(sent)
for j in range(1, 2):
for i in range(j, len(sent)):
if max(sent.at[i, str(j)], sent.at[j, str(i)]) > 0 :
#print(i, j, sent.at[i, str(j)], received.at[j, str(i)])
same[i][j] = 1
same[j][i] = 1
same = np.asmatrix(same)
G2 = nx.from_numpy_matrix(same)
print_girvan_communities(G2, 30, 10)
part = community.best_partition(G2)
values = [part.get(node) for node in G2.nodes()]
print(set(values))
fig, ax = plt.subplots(1, 1, figsize=(7.5, 7.5), clear = True)
fig.suptitle('Community graph using the best modularity partition', fontsize = 14)
nx.draw_kamada_kawai(G2, cmap = plt.get_cmap('jet'), node_color = values, node_size=60, width = 0.05, with_labels=False)
test(G2, 10, 1)
The Bass model seen in class is pertinent for forecasting new adoption after a product introduction.
From the "Forecasting adoption of a new product" article, we could find a similar explanation of this model. Only few notations are different. The number of new adopters during period t is called $F_t$ instead of $N_t$ and $S_t$ will be the actual value of new adopters during period t.
With this in mind, we will try to reproduce the results shown about $The Doctor$.
First, we will enter the data about box office revenues.
S_t = np.array([0.1, 3., 5.2, 7., 5.25, 4.90, 3., 2.4, 1.9, 1.3, 0.8, 0.6])
C_t = np.array([0, 0.1, 3.1, 8.3, 15.3, 20.55, 25.45, 28.45, 30.85, 32.75, 34.05, 34.85, 35.45])
print(S_t, C_t)
We could now resolve the following nonlinear problem.
$ min \sum_{t=1}^{12} (F_t - S_t)^2$
s.t.
$F_t = (p + q[C_{t-1}/m])(m-C_{t-1}), t = 1, ..., 12$
The variables are $m$, $p$ and $q$.
def residuals(x):
res = 0
for i in range(1, 13):
f_t = (x[1]+x[2]*C_t[i-1]/x[0])*(x[0]-C_t[i-1])
res += (f_t - S_t[i-1])**2
return res
from scipy.optimize import minimize, Bounds
init = np.array([1., 0.25, 0.75])
bounds = Bounds([0, 0, 0], [40, 1, 1])
sol = minimize(residuals, init , method='Powell')
print(sol)
sol = minimize(residuals, init , method='L-BFGS-B', bounds = bounds)
print(sol)
sol = minimize(residuals, init , method='TNC', bounds = bounds)
print(sol)
We observe that any of the 3 models could be used for solving the problem. Now, let us try to forecast the evolution of revenues with only the 4th first weeks.
def forecast(x):
res = []
C_t = [x[0]*(1-np.exp(-(x[1]+x[2])*i))/(1+x[2]/x[1]*np.exp(-(x[1]+x[2])*i)) for i in range(0, 12)]
for i in range(1, 13):
f_t = (x[1]+x[2]*C_t[i-1]/x[0])*(x[0]-C_t[i-1])
res.append(f_t)
return res
def residuals_forecast(x):
res = 0
for i in range(1, 5):
f_t = (x[1]+x[2]*C_t[i-1]/x[0])*(x[0]-C_t[i-1])
res += (f_t - S_t[i])**2
return res
sol1 = minimize(residuals_forecast, init , method='Powell')
print(sol1.x)
sol2 = minimize(residuals_forecast, init , method='L-BFGS-B', bounds = bounds)
print(sol2.x)
sol3 = minimize(residuals_forecast, init , method='TNC', bounds = bounds)
print(sol3.x)
Let us show in a graph our 3 predictions and the actual revenues.
import matplotlib.pyplot as plt
X = [i for i in range(1, 13)]
Y1 = forecast(sol1.x)
Y2 = forecast(sol2.x)
#Y3 = forecast(sol3.x)
Y4 = forecast(sol.x)
plt.plot(X, S_t, label = "Actual revenues")
plt.plot(X, Y4, label = 'Retrospective model')
plt.plot(X, Y1, label = "Forecast model 1")
plt.plot(X, Y2, label = "Forecast models 2&3")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
It appears that any of the forecasting models are under evaluating the number of final adopters. Consequently, the peak arrives earlier and disappears faster in those scenario.
If we know about investments in companies or projects, it is also possible for national or international banks to invest in the prosperity of other central banks. The risks involved in this way are assessed by means of exposure measurement.
In the years 2007-2008, a financial crisis shook the financial markets, redistributing the risks between the different states. Let's take a look at this development here.
financial_data = pd.read_csv('D:\\Johanna\\Ecole\\Imperial\\Network Analysis\\HW3\\hw3_pbm3_data.csv')
financial_data.head()
date = sorted(list(set(financial_data['net_id'])))
print(date)
The following 2 functions are used to display networks between countries. For ease of understanding, the draw_graph function only takes into account the imbalance of exposure between different countries. The resulting graph is thus directed.
def create_network(G, positions):
#Copy the graph do avoid to mess up with data
H = G.copy()
#Divide between positive and negative correlation
#for st1, st2, weight in G.edges(data=True):
# if weight["weight"] <25 :
# H.remove_edge(st1, st2)
#Put into list format
edges,weights = zip(*nx.get_edge_attributes(H,'weight').items())
weights = tuple(np.asarray(weights)/500)
d = H.in_degree()
indegree = list(d)
outdegree = list(H.out_degree())
#creates list of nodes and a list their degrees that will be used later for their sizes
nodelist = []
node_sizes = []
for i in d:
if i[1] > 0:
nodelist.append(i[0])
node_sizes.append(i[1])
#Circular is better since every node is linked to every others
if positions == []:
positions=nx.circular_layout(H)
#Use a large number to see clearly
plt.figure(figsize=(15,15))
#Use NetworkX to print nodes
nx.draw_networkx_nodes(H,positions,node_color='#DA70D6',nodelist=nodelist,
#####the node size will be now based on its degree
node_size=tuple([x**3 for x in node_sizes]),alpha=0.8)
#Print labels
nx.draw_networkx_labels(H, positions, font_size=8,
font_family='sans-serif')
#Print the edges with the good color
nx.draw_networkx_edges(H, positions, edge_list=edges,style='solid',
###adds width=weights and edge_color = weights
###so that edges are based on the weight parameter
###edge_cmap is for the color scale based on the weight
### edge_vmin and edge_vmax assign the min and max weights for the width
width=weights, edge_color = weights,
edge_vmin = min(weights), edge_vmax=max(weights), alpha = 0.5)
# displays the graph without axis
plt.axis('off')
plt.show()
return positions
def draw_graph(d, positions):
G = nx.DiGraph()
dated = (financial_data[financial_data['net_id']==d].drop(columns = 'net_id'))
for i in set(dated['from_id']):
for_i = dated[dated['from_id'] == i]
for j in set(dated['to_id']):
for_j = dated[dated['from_id'] == j]
if i != j:
for_i_j = for_i[for_i['to_id'] == j]
for_j = for_j[for_j['to_id'] == i]
if not for_i_j.empty:
if not for_j.empty :
x = max(0, for_i_j['value'].iloc[0])
y = max(0, for_j['value'].iloc[0])
v = x - y
if v <= 0:
edge = [tuple([j, i, -v])]
else :
edge = [tuple([i, j, v])]
G.add_weighted_edges_from(edge)
else :
edge = [tuple([i, j, for_i_j['value'].iloc[0]])]
G.add_weighted_edges_from(edge)
positions = create_network(G, positions)
return G, positions
To try to understand the overall evolution, let's look at the state of the network every 10 months.
positions = []
G = []
for k in range(0, len(date), 10) :
d = date[k]
if k == 10:
G1, positions = draw_graph(str(d), [])
G.append(G1)
else :
G1, positions = draw_graph(str(d), positions)
G.append(G1)
Graphs = G.copy()
We can see that during the financial crisis, confidence in the United States has been greatly mitigated. The imbalance with other countries is less. We can also notice that countries are more inclined to invest in different countries: one should not put all one's eggs in the same basket.
Now let's look at the evolution of the communities.
for i in range (len(Graphs)):
G = nx.Graph(Graphs[i])
part = community.best_partition(G)
values = [part.get(node) for node in G.nodes()]
fig, ax = plt.subplots(1, 1, figsize=(7.5, 7.5), clear = True)
nx.draw_spring(G, node_color = values, node_size=1000, width = 0.05, with_labels=True)
We do not see any major redistribution in communities despite the financial crisis.
Now let's see if the evolution actually corresponds to the previous visualizations.
relations = np.array(financial_data[['from_id', 'to_id']])
relations
financial_data['couple']=financial_data[['from_id', 'to_id']].apply(lambda row : (row['from_id'], row['to_id']), axis =1)
net = financial_data.drop(financial_data[financial_data['net_id']==date[0]].index, axis = 0)
net.head()
net2= net.drop(columns = 'couple')
net3= net2.groupby(['from_id', 'net_id']).sum()
fig, ax = plt.subplots(1, 1, figsize=(15, 15), clear = True)
for x in set(net['from_id']):
v = []
if sum(net3.loc[x]['value']) > 500000:
for i in date :
if (x,i) in net3.index :
v.append(net3.loc[(x, i)])
else :
v.append(0)
plt.plot(date, v, label = x)
ax.legend()
plt.xticks(rotation=90)
plt.show()
Except US and Japan, all the other countries are reduced their exposure to other countries.
net3= net2.groupby(['to_id', 'net_id']).sum()
fig, ax = plt.subplots(1, 1, figsize=(15, 15), clear = True)
for x in set(net['to_id']):
v = []
if sum(net3.loc[x]['value']) > 500000:
for i in date :
if (x,i) in net3.index :
v.append(net3.loc[(x, i)])
else :
v.append(0)
plt.plot(date, v, label = x)
ax.legend()
plt.xticks(rotation=90)
plt.show()
And the 5 biggest countries are receiving less investment from other countries than before the crisis.